#Name:    mission_coding.R
#Data:    budget_missions.xlsx
#Purpose: Code agency mission based on 2003 GAO report that matches budget function to agencies
#Output:  A stata file for finishing the mission coding - missions_for_stata.dta
#Date:    04/04/2017


#Notes: Budget functions have a numerical categorization system characterized by a function and subfunction,
# each with a three digit numerical code. Each function is denoted by a number ending in zero and subfunctions
# add one to the number, e.g., Transportation is 400 and water transportation is 403.
# I will use numbers to code functions because that is easier than manipulating the long character strings.

library(xlsx)

######################################################################
#create a matrix that maps subfunction codes into function codes
######################################################################

fn<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/function_subfunction.xlsx",
               sheetIndex=1,header=T, stringsAsFactors=F)
names(fn)<-c("fn","subfn") #remove period from function, and rename to omit reserved word function

fn$fn_code<-NA
fn$subfn_code<-NA

fn$x<-strsplit(fn$fn,"(",fixed=T)
for(i in 1:nrow(fn)){
  fn$fn_code[i]<-fn$x[[i]][2]
}
fn$fn_code<-sub(")","",fn$fn_code,fixed=T)

fn$x<-strsplit(fn$subfn,"(",fixed=T)
for(i in 1:nrow(fn)){
  fn$subfn_code[i]<-fn$x[[i]][2]
}


#correct cases with additional parentheses
z<-grep("General retirement and disability insurance (excluding Social Security) (601)",fn$subfn,fixed=T)
fn$subfn_code[z]<-fn$x[[z]][3]

z<-grep("Interest on Treasury debt securities (gross) (901)",fn$subfn,fixed=T)
fn$subfn_code[z]<-fn$x[[z]][3]

fn$subfn_code<-sub(")","",fn$subfn_code,fixed=T)

codes2<-unique(fn$fn_code)
codes2<-substr(codes2,1,2)

######################################################################
#Load budget data
######################################################################

usda<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=1,
                header=F, stringsAsFactors=F)
usda<-usda[-1*c(nrow(usda),(nrow(usda)-1)),] #remove empty row and row of totals

doc<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=2,
                 header=F, stringsAsFactors=F)
doc<-doc[-1*c(nrow(doc),(nrow(doc)-1)),] #remove empty row and row of totals

#DOD agencies not identified

ed<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=4,
                header=F, stringsAsFactors=F)
ed<-ed[-1*c(nrow(ed),(nrow(ed)-1)),] #remove empty row and row of totals

doe<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=5,
                header=F, stringsAsFactors=F)
doe<-doe[-1*c(nrow(doe),(nrow(doe)-1)),] #remove empty row and row of totals

hhs<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=6,
                header=F, stringsAsFactors=F)
hhs<-hhs[-1*c(nrow(hhs),(nrow(hhs)-1)),] #remove empty row and row of totals

dhs<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=7,
                header=F, stringsAsFactors=F)
dhs<-dhs[-1*c(nrow(dhs),(nrow(dhs)-1)),] #remove empty row and row of totals

hud<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=8,
                header=F, stringsAsFactors=F)
hud<-hud[-1*c(nrow(hud),(nrow(hud)-1)),] #remove empty row and row of totals

doi<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=9,
                header=F, stringsAsFactors=F)
doi<-doi[-1*c(nrow(doi),(nrow(doi)-1)),] #remove empty row and row of totals

doj<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=10,
                header=F, stringsAsFactors=F)
doj<-doj[-1*c(nrow(doj),(nrow(doj)-1)),] #remove empty row and row of totals

dol<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=11,
                header=F, stringsAsFactors=F)
dol<-dol[-1*c(nrow(dol),(nrow(dol)-1)),] #remove empty row and row of totals

#DOS agencies not identified

dot<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=13,
                header=F, stringsAsFactors=F)
dot<-dot[-1*c(nrow(dot),(nrow(dot)-1)),] #remove empty row and row of totals

treas<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=14,
                header=F, stringsAsFactors=F)
treas<-treas[-1*c(nrow(treas),(nrow(treas)-1)),] #remove empty row and row of totals

#VA doesn't break out agencies

epa<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=16,
                header=F, stringsAsFactors=F)
epa<-epa[-1*c(nrow(epa),(nrow(epa)-1)),] #remove empty row and row of totals

#EOP not seperated

gsa<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=18,
                header=F, stringsAsFactors=F)
gsa<-gsa[-1*c(nrow(gsa),(nrow(gsa)-1)),] #remove empty row and row of totals

nasa<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=19,
                header=F, stringsAsFactors=F)
nasa<-nasa[-1*c(nrow(nasa),(nrow(nasa)-1)),] #remove empty row and row of totals

nsf<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=20,
                header=F, stringsAsFactors=F)
nsf<-nsf[-1*c(nrow(nsf),(nrow(nsf)-1)),] #remove empty row and row of totals

nrc<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=21,
                header=F, stringsAsFactors=F)
nrc<-nrc[-1*c(nrow(nrc),(nrow(nrc)-1)),] #remove empty row and row of totals

opm<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=22,
                header=F, stringsAsFactors=F)
opm<-opm[-1*c(nrow(opm),(nrow(opm)-1)),] #remove empty row and row of totals

usps<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=23,
                header=F, stringsAsFactors=F)
usps<-usps[-1*c(nrow(usps),(nrow(usps)-1)),] #remove empty row and row of totals

sba<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=24,
                header=F, stringsAsFactors=F)
sba<-sba[-1*c(nrow(sba),(nrow(sba)-1)),] #remove empty row and row of totals

ssa<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=25,
                header=F, stringsAsFactors=F)
ssa<-ssa[-1*c(nrow(ssa),(nrow(ssa)-1)),] #remove empty row and row of totals

ind<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/budget_missions.xlsx",sheetIndex=26,
                header=F, stringsAsFactors=F)
ind<-ind[-1*c(nrow(ind),(nrow(ind)-1)),] #remove empty row and row of totals


######################################################################
#create a function to code spreadsheets using function and subfunction codes
######################################################################

#want to produce a data set that has each agency coded for each policy area as a proportion of spending
mission<-function(data){
  #create an empty data frame with full set of policy functions
  z<-matrix(NA,nrow=nrow(data)-1,ncol=length(codes2)+1) #there are 17 policy areas, plus need an agency name column; first row is policy subfunction list
  z<-as.data.frame(z)
  names(z)<-c("agency",unique(fn$fn_code))

  #extract agency codes from data
  n<-"agency"

  for(i in 2:ncol(data)-1){
    q<-strsplit(data[1,i],"(",fixed=T)
    if(length(q[[1]])==2) n<-c(n,q[[1]][2])
    if(length(q[[1]])==3) n<-c(n,q[[1]][3])
  }

  n<-sub(")","",n,fixed=T)

  names(data)<-c(n,"total")

  z$agency<-data$agency[-1] #transfer agency names

  for(i in 2:nrow(data)){#row
    for(k in 1:length(codes2)){#codes
      s<-0 #create an object to hold subfunctions from same function
        for(j in 2:ncol(data)-1){#column
        if(is.na(as.numeric(data[i,j]))==F){
          if(as.numeric(data[i,j])>0 & substr(names(data)[j],1,2)==codes2[k]){
          s<-as.numeric(data[i,j])/as.numeric(data$total[i])+s
          z[z$agency==data$agency[i],substr(names(z),1,2)==codes2[k]]<-s
          }
        }
      }
    }
  }

  z$total<-NA
  for(i in 1:nrow(z)){
    z$total[i]<-sum(z[i,2:(ncol(z)-1)],na.rm=T)
  }
  return(z)
}


######################################################################
#get mission proportions
######################################################################

usdam<-mission(data=usda)
docm<-mission(data=doc)
#no dod
edm<-mission(data=ed)
doem<-mission(data=doe)
hhsm<-mission(data=hhs)
dhsm<-mission(data=dhs)
hudm<-mission(data=hud)
doim<-mission(data=doi)
dojm<-mission(data=doj)
dolm<-mission(data=dol)
#no dos
dotm<-mission(data=dot)
treasm<-mission(data=treas)
#no va
epam<-mission(data=epa)
#no eop
gsam<-mission(data=gsa)
nasam<-mission(data=nasa)
nsfm<-mission(data=nsf)
nrcm<-mission(data=nrc)
opmm<-mission(data=opm)
uspsm<-mission(data=usps)
sbam<-mission(data=sba)
ssam<-mission(data=ssa)
indm<-mission(data=ind)

msn<-rbind(usdam,docm,edm,doem,hhsm,dhsm,hudm,doim,dojm,dolm,dotm,treasm,epam,gsam,nasam,
           nsfm,nrcm,opmm,uspsm,sbam,ssam,indm)

nrow(msn)
table(msn$total) #all 1's, means all spending is categorized

#export for printing and manual inspection
#write.xlsx2(msn,
#file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/missions_coded.xlsx")

######################################################################
#code agency names to match survey
######################################################################

msn$agency[msn$agency=="Bureau of the Census"]<-"U.S. Census Bureau"
#discretion
msn$agency[msn$agency=="Economic and Statistical Analysis"]<-"Economics and Statistics Administration"
msn$agency[msn$agency=="Federal Student Aid"]<-"Office of Federal Student Aid"
msn$agency[msn$agency=="Indian Health Services"]<-"Indian Health Service"
msn$agency[msn$agency=="Emergency Preparedness and Response"]<-"Federal Emergency Management Agency"
msn$agency[msn$agency=="United States Coast Guard"]<-"Coast Guard"
msn$agency[msn$agency=="United States Secret Service"]<-"Secret Service"
msn$agency[msn$agency=="Citizenship and Immigration Services"]<-"United States Citizenship and Immigration Services"
msn$agency[msn$agency=="Public and Indian Housing Programs"]<-"Office of Public and Indian Housing"
msn$agency[msn$agency=="United States Fish and Wildlife Service"]<-"U.S. Fish and Wildlife Service"
msn$agency[msn$agency=="United States Geological Survey"]<-"U.S. Geological Survey"
msn$agency[msn$agency=="Bureau of Alcohol, Tobacco, Firearms, and Explosives"]<-"Bureau of Alcohol, Tobacco, Firearms and Explosives"
#discretion
msn$agency[msn$agency=="Legal Activities and U.S. Marshals"]<-"U.S. Marshals Service"
#discretion
msn$agency[msn$agency=="Employee benefits security administration"]<-"Employee Benefits Security Administration"
msn$agency[msn$agency=="Federal Prison System"]<-"Bureau of Prisons"
msn$agency[msn$agency=="Comptroller of the Currency"]<-"Office of the Comptroller of the Currency"
msn$agency[msn$agency=="United States Mint"]<-"U.S. Mint"
msn$agency[msn$agency=="Agency for International Development"]<-"United States Agency for International Development"
msn$agency[msn$agency=="Postal Service"]<-"United States Postal Service"
msn$agency[msn$agency=="African Development Foundation"]<-"United States African Development Foundation"
msn$agency[msn$agency=="Export-Import Bank of the United States"]<-"Export-Import Bank of the U.S."
#discretion
msn$agency[msn$agency=="Federal Housing Finance Board"]<-"Federal Housing Finance msn$agency"
msn$agency[msn$agency=="International Trade Commission"]<-"United States International Trade Commission"
msn$agency[msn$agency=="Trade and Development Agency"]<-"United States Trade and Development Agency"

######################################################################
#create a file for stata
######################################################################

ag<-read.xlsx2(file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/agency_list.xlsx",
               sheetIndex=1,header=F, stringsAsFactors=F)
names(ag)<-c("agency","dept")

ch<-merge(ag,msn,by="agency",all.x=T)

#create variable names for stata
z<-names(ch)
z[3:20]<-paste("m", z[3:20],sep="")
names(ch)<-z

#export to stata for remainder of coding
library(readstata13)

save.dta13(ch,file="C:/Users/richar33/Dropbox/Papers/expertise/budget_missions/missions_for_stata.dta")





